1 Introduction

The purpose of this notebook is to integrate all B cells into a single Seurat object.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)

2.2 Parameters

path_to_obj_general <- here::here("scATAC-seq/results/R_objects/8.3.tonsil_peakcalling_annotation_level1_signature.rds")

path_to_NBC_MBC <- here::here("scATAC-seq/results/R_objects/level_3/NBC_MBC/NBC_MBC_integrated_level_3.rds")
path_to_GCBC <- here::here("scATAC-seq/results/R_objects/level_3/GCBC/GCBC_integrated_level_3.rds")
path_to_PC <- here::here("scATAC-seq/results/R_objects/level_3/PC/PC_integrated_level_3.rds")

path_to_NBC_MBC_RNA <- here::here("scRNA-seq/3-clustering/3-level_3/NBC_MBC_clustered_level_3_with_pre_freeze.rds")
path_to_GCBC_RNA <- here::here("scRNA-seq/3-clustering/3-level_3/GCBC_clustered_level_3_with_pre_freeze.rds")
path_to_PC_RNA <- here::here("scRNA-seq/3-clustering/3-level_3/PC_clustered_level_3_with_pre_freeze.rds")

path_to_save <- here::here("scATAC-seq/results/R_objects/B_cells_integrated.rds")

2.3 Load data

NBC_MBC_RNA <- readRDS(path_to_NBC_MBC_RNA)
NBC_MBC_RNA
## An object of class Seurat 
## 37378 features across 126532 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony
GCBC_RNA <- readRDS(path_to_GCBC_RNA)
GCBC_RNA
## An object of class Seurat 
## 37378 features across 81653 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony
PC_RNA <- readRDS(path_to_PC_RNA)
PC_RNA
## An object of class Seurat 
## 37378 features across 10277 samples within 1 assay 
## Active assay: RNA (37378 features, 0 variable features)
##  3 dimensional reductions calculated: pca, umap, harmony
tonsil <- readRDS(path_to_obj_general)
tonsil
## An object of class Seurat 
## 270784 features across 101075 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 270784 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
B_cell_lineage <- subset(tonsil, annotation_level_1 == "NBC_MBC" |
                          annotation_level_1 == "GCBC" |
                          annotation_level_1 == "PC" )
B_cell_lineage
## An object of class Seurat 
## 270784 features across 71130 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 270784 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
DimPlot(
  B_cell_lineage,
  pt.size = 0.2,
  group.by = "annotation_level_1"
)

nbc_mbc <- readRDS(path_to_NBC_MBC)
gcbc <- readRDS(path_to_GCBC)
pc <- readRDS(path_to_PC)

2.4 Clean the data taking into account the level 3

We exclude the clusters of doublets or poor-quality cells detected at level 3.

selected_cells <- c(colnames(nbc_mbc),colnames(gcbc),colnames(pc))

B_cell_lineage_clean <- subset(B_cell_lineage, 
                               cells = selected_cells)
B_cell_lineage_clean
## An object of class Seurat 
## 270784 features across 64847 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 270784 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
DimPlot(
  B_cell_lineage_clean,
  pt.size = 0.2,
  group.by = "annotation_level_1"
)

3 Integration of the B-cell lineage data.

3.1 Normalization, dimensionality reduction

B_cell_lineage_clean <-  RunTFIDF(B_cell_lineage_clean) %>% 
FindTopFeatures(min.cutoff = 10) %>% RunSVD()  

DepthCor(B_cell_lineage_clean)

B_cell_lineage_clean <- RunUMAP(object = B_cell_lineage_clean, 
                                reduction = 'lsi', 
                                dims = 2:40)
DimPlot(B_cell_lineage_clean, 
        group.by ="assay")

3.2 Batch correction

B_cell_lineage_clean <- RunHarmony(
  object = B_cell_lineage_clean,
  dims = 2:40,
  group.by.vars = 'gem_id',
  reduction = 'lsi',
  assay.use = 'peaks_macs',
  project.dim = FALSE,
  max.iter.harmony = 20
)

# Non-linear dimension reduction and clustering
B_cell_lineage_clean <- RunUMAP(B_cell_lineage_clean, 
                    dims = 2:40, 
                    reduction = 'harmony') 

B_cell_lineage_clean <- FindNeighbors(object = B_cell_lineage_clean, 
                                      reduction = 'harmony', 
                                      dims = 2:40)
B_cell_lineage_clean <- FindClusters(object = B_cell_lineage_clean, 
                                     verbose = FALSE,
                                     resolution = 0.18)
DimPlot(object = B_cell_lineage_clean, label = TRUE) + NoLegend()

4 Annotation

We used the clusters defined at level 3 in the scRNA-seq data.

cluster_Naive  <- colnames(NBC_MBC_RNA)[NBC_MBC_RNA$seurat_clusters == 0 | NBC_MBC_RNA$seurat_clusters == 2]

DimPlot(B_cell_lineage_clean, 
  cols.highlight = "darkred", cols= "grey",
  cells.highlight= c(cluster_Naive),
  pt.size = 0.1
)

cluster_Memory_cs  <- colnames(NBC_MBC_RNA)[NBC_MBC_RNA$seurat_clusters == 1]
cluster_Memory_ncs  <- colnames(NBC_MBC_RNA)[NBC_MBC_RNA$seurat_clusters == 3]

DimPlot(B_cell_lineage_clean, 
  cols.highlight = "brown1", cols= "grey",
  cells.highlight= c(cluster_Memory_cs),
  pt.size = 0.1
)

DimPlot(B_cell_lineage_clean, 
  cols.highlight = "brown4", cols= "grey",
  cells.highlight= c(cluster_Memory_ncs),
  pt.size = 0.1
)

cluster_Plasma <- colnames(PC_RNA)

DimPlot(B_cell_lineage_clean, 
  cols.highlight = "cyan4", cols= "grey",
  cells.highlight= c(cluster_Plasma),
  pt.size = 0.1
)

cluster_Germinal <- colnames(GCBC_RNA)

cluster_Germinal_DZ <- colnames(GCBC_RNA)[GCBC_RNA$seurat_clusters == 5 | 
                                                    GCBC_RNA$seurat_clusters == 1 | 
                                                    GCBC_RNA$seurat_clusters == 3]

cluster_Germinal_LZ  <- colnames(GCBC_RNA)[GCBC_RNA$seurat_clusters == 4 |
                                             GCBC_RNA$seurat_clusters == 0 |
                                             GCBC_RNA$seurat_clusters == 6]

DimPlot(B_cell_lineage_clean, 
  cols.highlight = "green4", cols= "grey",
  cells.highlight= c(cluster_Germinal_DZ),
  pt.size = 0.1
)

DimPlot(B_cell_lineage_clean,
  cols.highlight = "green3", cols= "grey",
  cells.highlight= c(cluster_Germinal_LZ),
  pt.size = 0.1
)

5 Save

saveRDS(B_cell_lineage_clean, path_to_save)

6 Session Info

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] Signac_1.1.0.9000    forcats_0.5.0        stringr_1.4.0        dplyr_1.0.2          purrr_0.3.4          readr_1.4.0          tidyr_1.1.2          tibble_3.0.4         ggplot2_3.3.2        tidyverse_1.3.0      harmony_1.0          Rcpp_1.0.5           SeuratWrappers_0.3.0 Seurat_3.9.9.9010    BiocStyle_2.16.1    
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.1              SnowballC_0.7.0             rtracklayer_1.48.0          GGally_2.0.0                bit64_4.0.5                 knitr_1.30                  irlba_2.3.3                 DelayedArray_0.14.0         data.table_1.13.2           rpart_4.1-15                RCurl_1.98-1.2              AnnotationFilter_1.12.0     generics_0.1.0              BiocGenerics_0.34.0         GenomicFeatures_1.40.1      cowplot_1.1.0               RSQLite_2.2.1               RANN_2.6.1                  future_1.20.1               bit_4.0.4                   spatstat.data_2.1-0         xml2_1.3.2                  lubridate_1.7.9             httpuv_1.5.4                SummarizedExperiment_1.18.1 assertthat_0.2.1            xfun_0.18                   hms_0.5.3                   evaluate_0.14               promises_1.1.1              fansi_0.4.1                 progress_1.2.2              dbplyr_1.4.4                readxl_1.3.1                igraph_1.2.6                DBI_1.1.0                   htmlwidgets_1.5.2           reshape_0.8.8               stats4_4.0.3                ellipsis_0.3.1              RSpectra_0.16-0             backports_1.2.0            
##  [43] bookdown_0.21               biomaRt_2.44.4              deldir_0.2-3                vctrs_0.3.4                 Biobase_2.48.0              remotes_2.2.0               here_1.0.1                  ensembldb_2.12.1            ROCR_1.0-11                 abind_1.4-5                 withr_2.3.0                 ggforce_0.3.2               BSgenome_1.56.0             checkmate_2.0.0             sctransform_0.3.1           GenomicAlignments_1.24.0    prettyunits_1.1.1           goftest_1.2-2               cluster_2.1.0               lazyeval_0.2.2              crayon_1.3.4                labeling_0.4.2              pkgconfig_2.0.3             tweenr_1.0.1                GenomeInfoDb_1.24.0         nlme_3.1-150                ProtGenerics_1.20.0         nnet_7.3-14                 rlang_0.4.11                globals_0.13.1              lifecycle_0.2.0             miniUI_0.1.1.1              BiocFileCache_1.12.1        modelr_0.1.8                rsvd_1.0.3                  dichromat_2.0-0             cellranger_1.1.0            rprojroot_2.0.2             polyclip_1.10-0             matrixStats_0.57.0          lmtest_0.9-38               graph_1.66.0               
##  [85] Matrix_1.3-2                ggseqlogo_0.1               zoo_1.8-8                   reprex_0.3.0                base64enc_0.1-3             ggridges_0.5.2              png_0.1-7                   viridisLite_0.3.0           bitops_1.0-6                KernSmooth_2.23-17          Biostrings_2.56.0           blob_1.2.1                  parallelly_1.21.0           jpeg_0.1-8.1                S4Vectors_0.26.0            scales_1.1.1                memoise_1.1.0               magrittr_1.5                plyr_1.8.6                  ica_1.0-2                   zlibbioc_1.34.0             compiler_4.0.3              RColorBrewer_1.1-2          fitdistrplus_1.1-1          Rsamtools_2.4.0             cli_2.1.0                   XVector_0.28.0              listenv_0.8.0               patchwork_1.1.0             pbapply_1.4-3               htmlTable_2.1.0             Formula_1.2-4               MASS_7.3-53                 mgcv_1.8-33                 tidyselect_1.1.0            stringi_1.5.3               yaml_2.2.1                  askpass_1.1                 latticeExtra_0.6-29         ggrepel_0.8.2               grid_4.0.3                  VariantAnnotation_1.34.0   
## [127] fastmatch_1.1-0             tools_4.0.3                 future.apply_1.6.0          parallel_4.0.3              rstudioapi_0.12             foreign_0.8-80              lsa_0.73.2                  gridExtra_2.3               farver_2.0.3                Rtsne_0.15                  digest_0.6.27               BiocManager_1.30.10         shiny_1.5.0                 GenomicRanges_1.40.0        broom_0.7.2                 later_1.1.0.1               RcppAnnoy_0.0.16            OrganismDbi_1.30.0          httr_1.4.2                  AnnotationDbi_1.50.3        ggbio_1.36.0                biovizBase_1.36.0           colorspace_2.0-0            rvest_0.3.6                 XML_3.99-0.3                fs_1.5.0                    tensor_1.5                  reticulate_1.18             IRanges_2.22.1              splines_4.0.3               uwot_0.1.8.9001             RBGL_1.64.0                 RcppRoll_0.3.0              spatstat.utils_2.1-0        plotly_4.9.2.1              xtable_1.8-4                jsonlite_1.7.1              spatstat_1.64-1             R6_2.5.0                    Hmisc_4.4-1                 pillar_1.4.6                htmltools_0.5.1.1          
## [169] mime_0.9                    glue_1.4.2                  fastmap_1.0.1               BiocParallel_1.22.0         codetools_0.2-17            lattice_0.20-41             curl_4.3                    leiden_0.3.5                openssl_1.4.3               survival_3.2-7              rmarkdown_2.5               munsell_0.5.0               GenomeInfoDbData_1.2.3      haven_2.3.1                 reshape2_1.4.4              gtable_0.3.0